{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_file = r\"C:\\Users\\t-sabeer\\Documents\\databases\\SnapshotSerengeti.json\"\n",
    "with open(data_file, 'r') as f:\n",
    "     data = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "tax_file = r\"C:\\Users\\t-sabeer\\Documents\\code\\cameraTraps\\category_data_with_taxonomy.json\"\n",
    "with open(tax_file, 'r') as f:\n",
    "     taxonomy = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "db_file = r\"C:\\Users\\t-sabeer\\Documents\\code\\cameraTraps\\inat_category_lookup_SS.p\"\n",
    "inat_cat_lookup = pickle.load(open(db_file,'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['empty', 0]\n",
      "['human', 1]\n"
     ]
    }
   ],
   "source": [
    "images = data['images']\n",
    "categories = data['categories']\n",
    "annotations = data['annotations']\n",
    "cat_names = [[cat['name'],cat['id']] for cat in categories]\n",
    "for cat in cat_names:\n",
    "    if cat[0] not in inat_cat_lookup:\n",
    "        print(cat)\n",
    "cat_to_id = {}\n",
    "id_to_cat = {}\n",
    "for cat in categories:\n",
    "    cat_to_id[cat['name']] = cat['id']\n",
    "    id_to_cat[cat['id']] = cat['name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5089\n",
      "{'supercategory': 'Insecta', 'ancestors': [{'name': 'Animalia', 'rank': 'kingdom'}, {'name': 'Arthropoda', 'rank': 'phylum'}, {'name': 'Hexapoda', 'rank': 'subphylum'}, {'name': 'Insecta', 'rank': 'class'}, {'name': 'Pterygota', 'rank': 'subclass'}, {'name': 'Coleoptera', 'rank': 'order'}, {'name': 'Polyphaga', 'rank': 'suborder'}, {'name': 'Staphyliniformia', 'rank': 'suborder'}, {'name': 'Staphylinoidea', 'rank': 'superfamily'}, {'name': 'Silphidae', 'rank': 'family'}, {'name': 'Nicrophorinae', 'rank': 'subfamily'}, {'name': 'Nicrophorus', 'rank': 'genus'}], 'id': 1916, 'name': 'Nicrophorus tomentosus'}\n"
     ]
    }
   ],
   "source": [
    "print(len(taxonomy))\n",
    "print(taxonomy[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'supercategory': 'Mammalia', 'ancestors': [{'name': 'Animalia', 'rank': 'kingdom'}, {'name': 'Chordata', 'rank': 'phylum'}, {'name': 'Vertebrata', 'rank': 'subphylum'}, {'name': 'Mammalia', 'rank': 'class'}, {'name': 'Rodentia', 'rank': 'order'}, {'name': 'Sciuridae', 'rank': 'family'}, {'name': 'Xerinae', 'rank': 'subfamily'}, {'name': 'Marmotini', 'rank': 'tribe'}, {'name': 'Tamias', 'rank': 'genus'}], 'id': 230, 'name': 'Tamias dorsalis'}\n"
     ]
    }
   ],
   "source": [
    "for taxa in taxonomy:\n",
    "    if taxa['name'] == 'Tamias dorsalis':\n",
    "        print(taxa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('gazelleGrants', 'Nanger granti'), ('reedbuck', 'Redunca'), ('dikDik', 'Madoqua kirkii'), ('zebra', 'Equus quagga'), ('porcupine', 'Hystrix cristata'), ('gazelleThomsons', 'Eudorcas thomsonii'), ('hyenaSpotted', 'Crocuta crocuta'), ('warthog', 'Phacochoerus africanus'), ('impala', 'Aepyceros melampus'), ('elephant', 'Loxodonta africana'), ('giraffe', 'Giraffa camelopardus'), ('mongoose', 'Herpestes'), ('buffalo', 'Syncerus caffer'), ('hartebeest', 'Alcelaphus buselaphus'), ('guineaFowl', 'Numidia meleagris'), ('wildebeest', 'Connochaetes taurinus'), ('leopard', 'Panthera pardus'), ('ostrich', 'Struthio camelus'), ('lionFemale', 'Panthera leo'), ('koriBustard', 'Ardeotis kori'), ('otherBird', 'Aves'), ('batEaredFox', 'Otocyon megalotis'), ('bushbuck', 'Tragelaphus scriptus'), ('jackal', 'Canis'), ('cheetah', 'Acinonyx jubatus'), ('eland', 'Tragelaphus oryx'), ('aardwolf', 'Proteles cristata'), ('hippopotamus', 'Hippopotamus amphibius'), ('hyenaStriped', 'Hyaena hyaena'), ('aardvark', 'Orycteropus afer'), ('hare', 'Lepus capensis'), ('baboon', 'Papio anubis'), ('vervetMonkey', 'Chlorocebus pygerythrus'), ('waterbuck', 'Kobus ellipsiprymnus'), ('secretaryBird', 'Sagittarius serpentarius'), ('serval', 'Felis serval'), ('lionMale', 'Panthera leo'), ('topi', 'Damaliscus korrigum'), ('honeyBadger', 'Mellivora capensis'), ('rodents', 'Rodentia'), ('wildcat', 'Felis lybica'), ('civet', 'Civettictis civetta'), ('genet', 'Genetta genetta'), ('caracal', 'Caracal caracal'), ('rhinoceros', 'Diceros bicornis'), ('reptiles', 'Reptilia'), ('zorilla', 'Ictonyx striatus')]\n"
     ]
    }
   ],
   "source": [
    "print([(cat['name'],inat_cat_lookup[cat['name']]) for cat in categories if cat['name'] in inat_cat_lookup])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "old_cat_lookup = {inat_cat_lookup[cat]: cat for cat in inat_cat_lookup}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['koriBustard', 'ostrich', 'hyenaSpotted', 'lionMale', 'baboon', 'hippopotamus', 'waterbuck', 'bushbuck', 'buffalo', 'zebra', 'secretaryBird', 'vervetMonkey', 'elephant', 'warthog', 'wildebeest', 'cheetah', 'leopard', 'impala']\n",
      "['Ardeotis kori', 'Struthio camelus', 'Crocuta crocuta', 'Panthera leo', 'Papio anubis', 'Hippopotamus amphibius', 'Kobus ellipsiprymnus', 'Tragelaphus scriptus', 'Syncerus caffer', 'Equus quagga', 'Sagittarius serpentarius', 'Chlorocebus pygerythrus', 'Loxodonta africana', 'Phacochoerus africanus', 'Connochaetes taurinus', 'Acinonyx jubatus', 'Panthera pardus', 'Aepyceros melampus']\n"
     ]
    }
   ],
   "source": [
    "map_inat_to_ss = {}\n",
    "cats_in_inat = []\n",
    "inat_cats = []\n",
    "for taxa in taxonomy:\n",
    "    match = False\n",
    "    if taxa['name'] in inat_cat_lookup.values():\n",
    "        map_inat_to_ss[taxa['id']] = cat_to_id[old_cat_lookup[taxa['name']]]\n",
    "        match = True\n",
    "        cats_in_inat.append(old_cat_lookup[taxa['name']])\n",
    "        inat_cats.append(taxa['name'])\n",
    "    else: #no direct species match\n",
    "        for a in taxa['ancestors']:\n",
    "            if a['name'] in inat_cat_lookup.values():\n",
    "                #print(a)\n",
    "                map_inat_to_ss[taxa['id']] = cat_to_id[old_cat_lookup[a['name']]]\n",
    "                match = True\n",
    "    \n",
    "print(cats_in_inat) \n",
    "print(inat_cats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "pickle.dump(map_inat_to_ss,open('map_inat_to_ss.p','wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1319\n"
     ]
    }
   ],
   "source": [
    "print(len(map_inat_to_ss))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'otherBird'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "id_to_cat[map_inat_to_ss[1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aardvark Orycteropus afer\n",
      "aardwolf Proteles cristata\n",
      "batEaredFox Otocyon megalotis\n",
      "caracal Caracal caracal\n",
      "civet Civettictis civetta\n",
      "dikDik Madoqua kirkii\n",
      "eland Tragelaphus oryx\n",
      "gazelleGrants Nanger granti\n",
      "gazelleThomsons Eudorcas thomsonii\n",
      "genet Genetta genetta\n",
      "giraffe Giraffa camelopardus\n",
      "guineaFowl Numidia meleagris\n",
      "hare Lepus capensis\n",
      "hartebeest Alcelaphus buselaphus\n",
      "honeyBadger Mellivora capensis\n",
      "hyenaStriped Hyaena hyaena\n",
      "lionFemale Panthera leo\n",
      "porcupine Hystrix cristata\n",
      "reedbuck Redunca\n",
      "rhinoceros Diceros bicornis\n",
      "serval Felis serval\n",
      "topi Damaliscus korrigum\n",
      "wildcat Felis lybica\n",
      "zorilla Ictonyx striatus\n"
     ]
    }
   ],
   "source": [
    "for cat in inat_cat_lookup:\n",
    "    if cat_to_id[cat] not in map_inat_to_ss.values():\n",
    "        print(cat, inat_cat_lookup[cat])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'name': 'Animalia', 'rank': 'kingdom'}\n",
      "{'name': 'Arthropoda', 'rank': 'phylum'}\n",
      "{'name': 'Hexapoda', 'rank': 'subphylum'}\n",
      "{'name': 'Insecta', 'rank': 'class'}\n",
      "{'name': 'Pterygota', 'rank': 'subclass'}\n",
      "{'name': 'Coleoptera', 'rank': 'order'}\n",
      "{'name': 'Polyphaga', 'rank': 'suborder'}\n",
      "{'name': 'Staphyliniformia', 'rank': 'suborder'}\n",
      "{'name': 'Staphylinoidea', 'rank': 'superfamily'}\n",
      "{'name': 'Silphidae', 'rank': 'family'}\n",
      "{'name': 'Nicrophorinae', 'rank': 'subfamily'}\n",
      "{'name': 'Nicrophorus', 'rank': 'genus'}\n"
     ]
    }
   ],
   "source": [
    "for taxa in taxonomy[:1]:\n",
    "    for a in taxa['ancestors']:\n",
    "        print(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
